/**
*  @license
* Copyright 2022 JsData. All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.

* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ==========================================================================
*/
import DataFrame from "../core/frame"
import { ArrayType1D, ArrayType2D } from "../shared/types"
import { variance, std, median, mode } from 'mathjs';
import  concat from "../transformers/concat"
import Series from "../core/series";



/**
 * The class performs all groupby operation on a dataframe
 * involving all aggregate funciton
 * @param {colDict} colDict Object of unique keys in the group by column
 * @param {keyCol} keyCol Array contains the column names
 * @param {data} Array the dataframe data
 * @param {columnName} Array of all column name in the dataframe.
 * @param {colDtype} Array columns dtype
 */
export default class Groupby {
  colDict: { [key: string ]: {} } = {}
  keyCol: ArrayType1D
  data?: ArrayType2D | null
  columnName: ArrayType1D
  colDtype: ArrayType1D
  colIndex: ArrayType1D
  groupDict?: any
  groupColNames?: Array<string>
  keyToValue: {
    [key: string] : ArrayType1D
  } = {}
  
  constructor(keyCol: ArrayType1D, data: ArrayType2D | null, columnName: ArrayType1D, colDtype:ArrayType1D, colIndex: ArrayType1D) {

    this.keyCol = keyCol;
    this.data = data;
    this.columnName = columnName;
    //this.dataTensors = {}; //store the tensor version of the groupby data
    this.colDtype = colDtype;
    this.colIndex = colIndex

  }
  /**
   * Generate group object data needed for group operations
   * let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
   * let cols = [ "A", "B", "C" ];
   * let df = new dfd.DataFrame(data, { columns: cols });
   * let groupDf = df.groupby([ "A" ]);
   * The following internal object is generated and save to this.colDict
   * {
   *  '1': { A: [ 1 ], B: [ 2 ], C: [ 3 ] },
   *  '4': { A: [ 4 ], B: [ 5 ], C: [ 6 ] },
   *  '20': { A: [ 20 ], B: [ 30 ], C: [ 40 ] },
   *  '39': { A: [ 39 ], B: [ 89 ], C: [ 78 ] }
   * }
   * Since for groupby using more than one columns is index via '-'
   * e.g for df.groupby(['A','B'])
   * the result will look like this
   * {
   *  '1-2': {A: [ 1 ], B: [ 2 ], C: [ 3 ]},
   *  '4-5': {A: [ 4 ], B: [ 5 ], C: [ 6 ]}
   * }
   * but in doing analysis on a specific column like this
   * df.groupby(['A','B']).col(['C'])
   * will have the following set of internal result
   * {
   *  '1-2': { C: [ 3 ]},
   *  '4-5': {C: [ 6 ]}
   * }
   * In building our multindex type of DataFrame for this data,
   * we've somehow loose track of value for column A and B.
   * This could actually be generated by using split('-') on the object keys
   * e.g '1-2'.split('-') will give us the value for A and B.
   * But we might have weird case scenerio where A and B value has '-`
   * e.g 
   * {
   *  '1--2-': { C: [ 3 ]},
   *  '4--5-': {C: [ 6 ]}
   * }
   * using `.split('-') might not work well
   * Hence we create a key-value `keyToValue` object to store index and their 
   * associated value
   * NOTE: In the previous implementation we made use of Graph representation
   * for the group by data and Depth First search (DFS). But we decided to use key-value
   * object in javascript as an hashmap to reduce search time compared to using Grpah and DFS
   */
  group():  Groupby{
    const self = this
    let keyToValue:{
      [key: string] : ArrayType1D
    } = {}
    const group = this.data?.reduce((prev: any, current)=>{
      let indexes= []
      for(let i in self.colIndex) {
        let index = self.colIndex[i] as number
        indexes.push(current[index])
      }
      let index = indexes.join('-') 
      
      if(!keyToValue[index]) {
        keyToValue[index] = indexes
      }

      if(prev[index]) {
        let data = prev[index]
        for (let i in self.columnName) {
          let colName = self.columnName[i] as string
          data[colName].push(current[i])
        }
      } else {
        prev[index] = {}
        for (let i in self.columnName) {
          let colName = self.columnName[i] as string
          prev[index][colName] = [current[i]]
        }
      }
      return prev

    }, {})
    this.colDict = group
    this.keyToValue = keyToValue
    return this
  }

  /**
   * Generate new internal groupby data 
   * group = df.groupby(['A', 'B']).col('C')
   * This filter the colDict property as generated by `.group()`
   * it filter each group to contain only column `C` in their internal object
   * e.g
   * {
   *  '1-2': {A: [ 1 ], B: [ 2 ], C: [ 3 ]},
   *  '4-5': {A: [ 4 ], B: [ 5 ], C: [ 6 ]}
   * }
   * to
   * {
   *  '1-2': { C: [ 3 ]},
   *  '4-5': {C: [ 6 ]}
   * } 
   * @param colNames column names
   * @return Groupby
   */
  col(colNames: ArrayType1D | undefined): Groupby {
    
    if (typeof colNames === "undefined") {
      colNames = this.columnName.filter((_, index)=>{
        return !this.colIndex.includes(index)
      })
    }
    let self = this
    colNames.forEach((val) => {
      if (!self.columnName.includes(val)) 
        throw new Error(`Column ${val} does not exist in groups`)
    })
    let colDict: { [key: string ]: {} } = {...this.colDict}
    for(let [key, values] of Object.entries(colDict)) {
      let c: { [key: string ]: [] } = {}
      let keyVal: any = {...values}
      for(let colKey in colNames) {
        let colName = colNames[colKey] as string
        c[colName] = keyVal[colName] 
      }
      colDict[key] = c
    }
    const gp = new Groupby(
      this.keyCol,
      null,
      this.columnName,
      this.colDtype,
      this.colIndex
    )
    gp.colDict = colDict
    gp.groupColNames = colNames as Array<string>
    gp.keyToValue = this.keyToValue

    return gp
  }

  /**
   * Perform all groupby arithmetic operations
   * In the previous implementation all groups data are 
   * stord as DataFrame, which involve lot of memory usage 
   * Hence each groups are just pure javascrit object
   * and all arithmetic operation is done directly on javascript 
   * arrays.
   * e.g 
   * using this internal data 
   * {
   *  '1-2': {A: [ 1,3 ], B: [ 2,5 ], C: [ 3, 5 ]},
   *  '4-5': {A: [ 4,1 ], B: [ 5,0 ], C: [ 6, 12 ]}
   * }
   * 1) using groupby(['A', 'B']).arithmetic("mean")
   * result: * {
   *  '1-2': {A_mean: [ 2 ], B_mean: [ 3.5 ], C_mean: [ 4 ]},
   *  '4-5': {A_mean: [ 2.5 ], B: [ 2.5 ], C_mean: [ 9 ]}
   * }
   * 2) .arithmetic({
   *    A: 'mean',
   *    B: 'sum',
   *    C: 'min'
   * })
   * result: 
   * {
   *  '1-2': {A_mean: [ 2 ], B_sum: [ 7 ], C_min: [ 3 ]},
   *  '4-5': {A_mean: [ 2.5 ], B_sum: [ 5 ], C_min: [ 6 ]}
   * }
   * 3) .arithmetic({
   *    A: 'mean',
   *    B: 'sum',
   *    C: ['min', 'max']
   * })
   * result:
   * {
   *  '1-2': {A_mean: [ 2 ], B_sum: [ 7 ], C_min: [ 3 ], C_max: [5]},
   *  '4-5': {A_mean: [ 2.5 ], B_sum: [ 5 ], C_min: [ 6 ], C_max: [12]}
   * }
   * @param operation 
   */
  private arithemetic(operation: {[key: string] : Array<string> | string} | string): { [key: string ]: {} } {

    const opsName = [ "mean", "sum", "count", "mode", "std", "var", "cumsum", "cumprod",
    "cummax", "cummin", "median" , "min", "max"];
    if (typeof operation === "string" ) {
      if (!opsName.includes(operation)) {
        throw new Error(`group operation: ${operation} is not valid`)
      }
    } else {
      Object.keys(operation).forEach((key)=>{
        let ops = operation[key]
        if(Array.isArray(ops)) {
          for(let op of ops) {
            if (!opsName.includes(op)) {
              throw new Error(`group operation: ${op} for column ${key} is not valid`)
            }
          }
        } else {
          if (!opsName.includes(ops)) {
            throw new Error(`group operation: ${ops} for column ${key} is not valid`)
          }
        }
        
      })
    }
    let colDict: { [key: string ]: {} } = {...this.colDict}
    for(const [key, values] of Object.entries(colDict)) {
      let colVal: { [key: string ]: Array<number> } = {}
      let keyVal: any = {...values}
      let groupColNames: Array<string> = this.groupColNames as Array<string>
      for(let colKey=0; colKey < groupColNames.length; colKey++) {
        let colName = groupColNames[colKey]
        let colIndex = this.columnName.indexOf(colName)
        let colDtype = this.colDtype[colIndex]
        let operationVal = (typeof operation === "string") ? operation : operation[colName]
        if (colDtype === "string" && operationVal !== "count") throw new Error(`Can't perform math operation on column ${colName}`)

        if (typeof operation === "string") {
          let colName2 = `${colName}_${operation}`
          colVal[colName2] = this.groupMathLog(keyVal[colName], operation)
        }
        else {
          if(Array.isArray(operation[colName])) {
            for(let ops of operation[colName]) {
              let colName2 = `${colName}_${ops}`
              colVal[colName2] = this.groupMathLog(keyVal[colName],ops)
            }
          } else {
            let ops: string = operation[colName] as string
            let colName2 = `${colName}_${ops}`
            colVal[colName2] = this.groupMathLog(keyVal[colName], ops)
          }
          
        }
      }
      colDict[key] = colVal
    }
    return colDict
  }

  /**
   * Peform all arithmetic logic
   * @param colVal 
   * @param ops 
   */
  private groupMathLog(colVal: Array<number>, ops: string): Array<number>{
    let data = []
    switch(ops) {
      case "max":
        let max = colVal.reduce((prev, curr)=> {
          if (prev > curr) {
            return prev
          }
          return curr
        })
        data.push(max)
        break;
      case "min":
        let min = colVal.reduce((prev, curr)=> {
          if (prev < curr) {
            return prev
          }
          return curr
        })
        data.push(min)
        break;
      case "sum":
        let sum = colVal.reduce((prev, curr)=> {
          return prev + curr
        })
        data.push(sum)
        break;
      case "count":
        data.push(colVal.length)
        break;
      case "mean":
        let sumMean = colVal.reduce((prev, curr)=> {
          return prev + curr
        })
        data.push(sumMean / colVal.length)
        break;
      case "std":
        data.push(std(colVal))
        break;
      case "var":
        data.push(variance(colVal))
        break;
      case "median":
        data.push(median(colVal))
        break;
      case "mode":
        data.push(mode(colVal))
        break;
      case "cumsum":
        colVal.reduce((prev, curr) => {
          let sum = prev + curr
          data.push(sum)
          return sum
        }, 0)
        break;
      case "cummin":
        data = [colVal[0]]
        colVal.slice(1,).reduce((prev, curr)=>{
          if (prev < curr) {
            data.push(prev)
            return prev
          }
          data.push(curr)
          return curr
        }, data[0])
        break;
      case "cummax":
        data = [colVal[0]]
        colVal.slice(1,).reduce((prev, curr)=> {
          if (prev > curr) {
            data.push(prev)
            return prev
          }
          data.push(curr)
          return curr
        }, data[0])
        break;
      case "cumprod":
        colVal.reduce((prev, curr) => {
          let sum = prev * curr
          data.push(sum)
          return sum
        }, 1)
        break;
    }
    return data
  }

  /**
   * Takes in internal groupby internal data and convert
   * them to a single data frame.
   * @param colDict 
   */
  private toDataFrame(colDict: { [key: string ]: {} }): DataFrame {
    let data:  { [key: string ]: ArrayType1D } = {}

    for(let key of this.colKeyDict(colDict)) {
      let value = colDict[key]
      let keyDict: { [key: string ]: ArrayType1D } = {}
      let oneValue = Object.values(value)[0] as ArrayType1D
      let valueLen = oneValue.length
      for(let key1 in this.keyCol) {
        let keyName = this.keyCol[key1] as string
        let keyValue = this.keyToValue[key][key1]
        keyDict[keyName] = Array(valueLen).fill(keyValue)
      }
      let combine: { [key: string ]: ArrayType1D } = {...keyDict, ...value}
      if(Object.keys(data).length < 1) {
        data = combine
      } else {
        for(let dataKey of Object.keys(data)) {
          let dataValue = combine[dataKey] as ArrayType1D
          data[dataKey] = [...data[dataKey], ...dataValue]
        }
      }
    }
    return new DataFrame(data)
  }

  private operations(ops: string): DataFrame {
    if (!this.groupColNames) {
      let colGroup = this.col(undefined)
      let colDict = colGroup.arithemetic(ops)
      let df = colGroup.toDataFrame(colDict)
      return df
    }
    let colDict = this.arithemetic(ops)
    let df = this.toDataFrame(colDict)
    return df
  }

  /**
   * Obtain the count for each group
   * @returns DataFrame
   * 
   */
  count(): DataFrame {
    return this.operations("count")
  }

  /**
   * Obtain the sum of columns for each group
   * @returns DataFrame
   * 
   */
  sum(): DataFrame{
    return this.operations("sum")
  }

  /**
   * Obtain the standard deviation of columns for each group
   * @returns DataFrame
   */
  std(): DataFrame{
    return this.operations("std")
  }

  /**
   * Obtain the variance of columns for each group
   * @returns DataFrame
   */
  var(): DataFrame{
    return this.operations("var")
  }

  /**
   * Obtain the mean of columns for each group
   * @returns DataFrame
   */
  mean(): DataFrame{
    return this.operations("mean")
  }

  /**
   * Obtain the cumsum of columns for each group
   * @returns DataFrame
   * 
   */
  cumSum(): DataFrame{
    return this.operations("cumsum")
  }

  /**
   * Obtain the cummax of columns for each group
   * @returns DataFrame
   */
  cumMax(): DataFrame{
    return this.operations("cummax")
  }

  /**
   * Obtain the cumprod of columns for each group
   * @returns DataFrame
   */
  cumProd(): DataFrame{
    return this.operations("cumprod")
  }

  /**
   * Obtain the cummin of columns for each group
   * @returns DataFrame
   */
  cumMin(): DataFrame{
    return this.operations("cummin")
  }

  /**
   * Obtain the max value of columns for each group
   * @returns DataFrame
   * 
   */
  max(): DataFrame{
    return this.operations("max")
  }

  /**
   * Obtain the min of columns for each group
   * @returns DataFrame
   */
  min(): DataFrame{
    return this.operations("min")
  }

  /**
   * Obtain a specific group
   * @param keys Array<string | number>
   * @returns DataFrame
   */
  getGroup(keys: Array<string | number>): DataFrame {
    let dictKey = keys.join("-")
    let colDict: { [key: string ]: {} }  = {}
    colDict[dictKey] = {...this.colDict[dictKey]}
    return this.toDataFrame(colDict)
  }

  /**
   * Perform aggregation on all groups
   * @param ops 
   * @returns DataFrame
   */
  agg(ops: { [key: string ]: Array<string> | string }): DataFrame {
    let columns = Object.keys(ops);
    let col_gp = this.col(columns);
    let data = col_gp.arithemetic(ops);
    let df = col_gp.toDataFrame(data);
    return df;
  }

  /**
   * Apply custom aggregator function
   * to each group
   * @param callable 
   * @returns DataFrame
   * @example
   * let grp = df.groupby(['A'])
   * grp.apply((x) => x.count())
   */
  apply(callable: (x: DataFrame)=> DataFrame | Series ): DataFrame {
    let colDict: { [key: string ]: DataFrame | Series } = {}
    for(const key of this.colKeyDict(this.colDict)) {
      let valDataframe = new DataFrame(this.colDict[key])
      colDict[key] = callable(valDataframe)
    }
    return this.concatGroups(colDict)
  }

  private concatGroups(colDict: {[key: string]: DataFrame | Series}): DataFrame {
    let data: Array<DataFrame | Series> = []
    for(const [key, values] of Object.entries(colDict)) {
      let copyDf: DataFrame;
      if (values instanceof DataFrame) {
        copyDf = values.copy()
      } 
      else {
        let columns = values.index as string[]
        columns = columns.length > 1 ? columns : ['applyOps']
        copyDf = new DataFrame([values.values], {columns: columns })
      }
      let len = copyDf.shape[0]
      let key1: any;
      for(key1 in this.keyCol){
        
        let keyName = this.keyCol[key1] as string
        let keyValue = this.keyToValue[key][key1]
        let dfValue = Array(len).fill(keyValue)
        let atIndex: number = parseInt(key1)
        if (this.groupColNames) {
          copyDf.addColumn(keyName, dfValue, {inplace: true, atIndex: atIndex })
        }
        else {
          copyDf.addColumn(`${keyName}_Group`, dfValue, {inplace: true, atIndex: atIndex })
        }
        
      }
      data.push(copyDf)
    }
    return concat({dfList: data, axis:0}) as DataFrame
  }
  
  /**
   * obtain the total number of groups
   * @returns number
   */
  get ngroups(): number{
    let keys = Object.keys(this.colDict)
    return keys.length
  }

  /**
   * obtaind the internal group data
   * @returns  {[keys: string]: {}}
   */
  get groups(): {[keys: string]: {}}{
    return this.colDict
  }

  /**
   * Obtain the first row of each group
   * @returns DataFrame
   */
  first(): DataFrame{
    return this.apply((x)=>{
      return x.head(1)
    })
  }

  /**
   * Obtain the last row of each group
   * @returns DataFrame
   */
  last(): DataFrame {
    return this.apply((x)=>{
      return x.tail(1)
    })
  }

  /**
   * Obtains the dataframe se of each groups
   * @returns DataFrame
   */
  size(): DataFrame {
    return this.apply((x)=>{
      return new Series([x.shape[0]])
    })
  }

  private colKeyDict(colDict: { [key: string ]: {} }): string[]{
    let keyDict :{ [key: string ]: string[] } = {}

    for(let key of Object.keys(colDict)) {
      let firstKey = key.split("-")[0]
      if (firstKey in keyDict) {
        keyDict[firstKey].push(key)
      }
      else {
        keyDict[firstKey] = [key]
      }
    }
    let keys = []
    for(let key of Object.keys(keyDict)) {
      keys.push(...keyDict[key])
    }
    return keys
  }

}